"""
解析Abstract
"""

from html.parser import HTMLParser


class ABSParser(HTMLParser):
    ttltxt = ""
    abstxt = ""
    _trigger = False
    _ttl_fin = False
    _abs_fin = False
    #editor suggestion 利用之前抓取的内容，不在这里处理
    #_edt_trigger = False

    def handle_starttag(self, tag, attrs):
        if tag != 'div':
            return
        #先找标题
        if not self._ttl_fin:
            if len(attrs) != 1:
                return
            #找到第一个medium-9,结束后就不再找medium-9
            if attrs[0][0] == "class" and attrs[0][1].startswith("medium-9"):
                self._trigger = True
        #
        if self._ttl_fin and not self._abs_fin:
            if len(attrs) != 2:
                return
            if attrs[0][0] != 'class' or attrs[0][1] != "content":
                return
            if attrs[1][0] != 'data-loaded' or attrs[1][1] != "yes":
                return
            if not self._abs_fin:
                self._trigger = True
 

    def handle_endtag(self, tag):
        #div class="content" data-loaded="yes" 后紧跟的 p中是内容
        if self._trigger:
            #标题在h3结束
            if tag == 'h3':
                self._ttl_fin = True
                self._trigger = False
            if tag == 'p':
                self._abs_fin = True
                self._trigger = False


    def handle_data(self, data):
        #内容里出现公式时会有一堆span包裹，只提取data并拼接
        if self._trigger:
            #print(data)
            if not self._ttl_fin:
                self.ttltxt += data
            if self._ttl_fin and not self._abs_fin:
                self.abstxt += data
